# Direct outputs eligible for public release

# GLOBAL SETTINGS --------------------------------------------------------------

options(
    scipen = 999,
    digits = 16,
    max.print = .Machine$integer.max,
    show.error.locations = TRUE,
    warn = 1
)

RNGkind("L'Ecuyer-CMRG")
seed <- 818675309L
set.seed(seed) # setting main seed

# PACKAGES ---------------------------------------------------------------------
library(data.table)
library(ggplot2)
library(scales)

# PACKAGE SETTINGS -------------------------------------------------------------

# data.table
setDTthreads(threads = 1L)
options(datatable.print.class = TRUE, datatable.print.keys = TRUE)
# so that printing the data.table also shows the variable type on top

# BEGIN FILE -------------------------------------------------------------------

# READ IN OUR DATA -------------------------------------------------------------

moved_tract_dt <-
    readRDS(
        "~/estimation-output/wealth_effect_estimates_moved_tract.rds"
    )
setnames(moved_tract_dt, "ref_event_time", "event_time")
moved_tract_dt <-
    moved_tract_dt[
        model == "reduced_form" &
            event_time == 1 &
            income_quartile %in% c(1, 4, 5)
    ]
moved_tract_dt[, dependent_var := "moved_tract"]
moved_tract_dt[, model := NULL]

moved_tract_not_county_dt <-
    readRDS(
        "~/estimation-output/event_study_estimates_moved_tract_not_county.rds"
    )
setnames(moved_tract_not_county_dt, "ref_event_time", "event_time")
moved_tract_not_county_dt <-
    moved_tract_not_county_dt[event_time == 1 & income_quartile %in% c(1, 4, 5)]
moved_tract_not_county_dt[, dependent_var := "moved_tract_not_county"]

moved_county_not_state_dt <-
    readRDS(
        "~/estimation-output/event_study_estimates_moved_county_not_state.rds"
    )
setnames(moved_county_not_state_dt, "ref_event_time", "event_time")
moved_county_not_state_dt <-
    moved_county_not_state_dt[event_time == 1 & income_quartile %in% c(1, 4, 5)]
moved_county_not_state_dt[, dependent_var := "moved_county_not_state"]

moved_state_dt <-
    readRDS(
        "~/estimation-output/event_study_estimates_moved_state.rds"
    )
setnames(moved_state_dt, "ref_event_time", "event_time")
moved_state_dt <-
    moved_state_dt[event_time == 1 & income_quartile %in% c(1, 4, 5)]
moved_state_dt[, dependent_var := "moved_state"]

moving_combined_dt <-
    rbindlist(
        list(
            moved_tract_dt,
            moved_tract_not_county_dt,
            moved_county_not_state_dt,
            moved_state_dt
        ),
        use.names = TRUE
    )

# Make a share decomposition
moving_combined_dt[
    ,
    total_tract_move :=
        max(as.integer(dependent_var == "moved_tract") * estimate),
    by = .(income_quartile)
]
moving_combined_dt[, share_of_total := (estimate / total_tract_move) * 100]
moving_combined_dt <- moving_combined_dt[dependent_var != "moved_tract"]

# Produce graph labels
moving_combined_dt[, x_label := as.character(NA)]
moving_combined_dt[income_quartile == 5, x_label := "Full Sample"]
moving_combined_dt[income_quartile == 1, x_label := "Quartile 1"]
moving_combined_dt[income_quartile == 4, x_label := "Quartile 4"]
moving_combined_dt[
    ,
    x_label :=
        factor(
            x_label,
            levels = c("Full Sample", "Quartile 1", "Quartile 4")
        )
]

moving_combined_dt[
    dependent_var == "moved_state",
    y_label_text := "Moved State"
]
moving_combined_dt[
    dependent_var == "moved_county_not_state",
    y_label_text := "Moved County (Within State)"
]
moving_combined_dt[
    dependent_var == "moved_tract_not_county",
    y_label_text := "Moved Tract (Within County)"
]
moving_combined_dt[
    ,
    y_label_text :=
        factor(
            y_label_text,
            levels =
                c(
                    "Moved State",
                    "Moved County (Within State)",
                    "Moved Tract (Within County)"
                )
        )
]

# We will also want to label with the share,
# so will need to calculate the label position
setorderv(
    moving_combined_dt,
    c("income_quartile", "y_label_text"),
    order = c(1L, -1L)
)
moving_combined_dt[
    ,
    y_label_number := cumsum(share_of_total) - (0.5 * share_of_total),
    by = .(income_quartile)
]

# now want to format share of total
# 1 decimal, and then (**) for statistical significance
moving_combined_dt[
    ,
    one_star := as.integer((estimate - (1.64 * cluster_se)) > 0)
]
moving_combined_dt[
    ,
    two_star := as.integer((estimate - (1.96 * cluster_se)) > 0)
]
moving_combined_dt[
    ,
    three_star := as.integer((estimate - (2.58 * cluster_se)) > 0)
]
moving_combined_dt[
    one_star == 1,
    share_of_total_new :=
        paste0(round(share_of_total, digits = 1), "% ", "(\u22c6)")
]
moving_combined_dt[
    two_star == 1,
    share_of_total_new :=
        paste0(round(share_of_total, digits = 1), "% ", "(\u22c6\u22c6)")
]
moving_combined_dt[
    three_star == 1,
    share_of_total_new :=
        paste0(round(share_of_total, digits = 1), "% ", "(\u22c6\u22c6\u22c6)")
]
moving_combined_dt[
    is.na(share_of_total_new),
    share_of_total_new := paste0(round(share_of_total, digits = 1), "%")
]

# will need distinct colors for the above text
moving_combined_dt[y_label_text == "Moved State", text_color := "light"]
moving_combined_dt[y_label_text != "Moved State", text_color := "dark"]

text_colors <- c("#000000", "#999999")

figure_b_12 <-
    ggplot(
        data = moving_combined_dt,
        aes(x = x_label, y = share_of_total, fill = y_label_text)
    ) +
    geom_bar(stat = "identity") +
    geom_text(aes(
        y = y_label_number,
        label = share_of_total_new,
        color = factor(text_color)
    ),
    size = 3.25,
    show.legend = FALSE
    ) +
    geom_vline(xintercept = 1.5, linetype = 3) +
    theme_bw(base_size = 11) +
    scale_y_continuous(
        breaks = (seq.int(from = 0, to = 100, by = 10)),
        labels = scales::number_format(
            accuracy = 1,
            big.mark = ","
        ),
        expand = expansion(mult = c(0.01, 0.01))
    ) +
    theme(
        panel.grid.minor = element_blank(), # remove actual gridlines
        panel.grid.major = element_blank(),
        legend.position = "bottom",
        legend.title = element_blank(),
        legend.key.size = unit(1.5, "lines")
    ) +
    scale_fill_viridis_d() +
    scale_color_manual(values = text_colors) +
    labs(x = "Sample", y = "Decomposition of Effect on Moving (%)") +
    coord_cartesian(ylim = c(0, 100)) +
    guides(fill = guide_legend(nrow = 1, byrow = TRUE, reverse = TRUE))

ggsave(
    plot = figure_b_12,
    filename = "~/paper/figures/figure-B.12.png",
    width = 6,
    height = 4,
    dpi = 600
)

ggsave(
    plot = figure_b_12,
    filename = "~/paper/figures/figure-B.12.tif",
    width = 6,
    height = 4,
    device = "tiff",
    dpi = 600
)

# Housekeeping
moved_tract_dt <- NULL
moved_tract_not_county_dt <- NULL
moved_county_not_state_dt <- NULL
moved_state_dt <- NULL
moving_combined_dt <- NULL
text_colors <- NULL
figure_b_12 <- NULL
rm(
    moved_tract_dt,
    moved_tract_not_county_dt,
    moved_county_not_state_dt,
    moved_state_dt,
    moving_combined_dt,
    text_colors,
    figure_b_12
)